#Loading Packages
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from IPython.display import HTML
#Loading dataset
df = pd.read_csv("C:/Users/Nathan/Documents/Portfolio/Portfolio Data/World Population EDA/world_population.csv")
#Determining the number of rows and columns in the dataset
df.shape
(234, 17)
The dataset contains population information on 234 different countries and territories within 17 variables:
#Check each column in the dataset for missing values
missing = df.notnull().sum(axis=0)
#Generate Bar plot for each variable in the dataset counting the number of non-missing instances
fig = px.bar(x=missing.index,
y=missing.values,
text=missing.values,
title='World Population Dataset: Total Number of Data Points (out of 234 rows)',
#color_discrete_sequence=px.colors.sequential.ice
)
fig.update_traces(hovertemplate='<br> Variable: %{x} </br> Number of Instances: %{y}')
fig.update_layout(xaxis_title='Dataset Variables',yaxis_title='Number of Instances')
fig.show(renderer='notebook')
There does not seem to be any missing values in the dataset